# ============================================================================
# Script: Justifying European Border Policies — Cluster Analysis
# Author: Lisa Herbig
# Project: Justifying European Border Policies (JEPP)
# Date: 27 November 2025
# ============================================================================


# ------------------------------------------------------------------------------
# Libraries
# ------------------------------------------------------------------------------
library(readr)
library(data.table)
library(dplyr)
library(tidyr)
library(ggplot2)
library(cluster)
library(vegan)       # distance
library(factoextra)  # elbow/nbclust
library(corrplot)
library(vcd)
library(changepoint)
library(lubridate)
library(scales)

# ------------------------------------------------------------------------------
# Output helpers + folders
# ------------------------------------------------------------------------------
setwd("~/OneDrive - UvA/1. Justification Paper/JEPP_Submission/JustifyingEuropeanBorderPolicies_SupplementaryMaterials")

dir.create("figures", showWarnings = FALSE, recursive = TRUE)
dir.create("tables",  showWarnings = FALSE, recursive = TRUE)
dir.create("exports", showWarnings = FALSE, recursive = TRUE)

save_png <- function(plot_obj, filename, width = 9, height = 6, dpi = 300) {
  ggsave(filename = file.path("figures", filename),
         plot = plot_obj, width = width, height = height, dpi = dpi)
}
save_base_jpeg <- function(filename, code, width = 2400, height = 1600, res = 300) {
  png(filename = file.path("figures", filename), width = width, height = height, res = res)
  on.exit(dev.off(), add = TRUE); force(code)
}

# ------------------------------------------------------------------------------
# Load dataset
# ------------------------------------------------------------------------------
data <- fread("~/OneDrive - UvA/1. Justification Paper/JEPP_Submission/JustifyingEuropeanBorderPolicies_SupplementaryMaterials/data/final_reshaped_data.csv")

# ------------------------------------------------------------------------------
# Preparation: keep rows with at least one justification coded
# ------------------------------------------------------------------------------
justification_vars <- grep("^J_", names(data), value = TRUE)
data_filtered <- data[rowSums(!is.na(data[, ..justification_vars])) > 0]

# Save full justification dataset
write.csv(data_filtered, "data/justification_full_dataset.csv", row.names = FALSE)
saveRDS(data_filtered, "data/justification_full_dataset.rds")

# ------------------------------------------------------------------------------
# Keep binary columns only (0/1/NA), then tidy solidarity fields
# ------------------------------------------------------------------------------
binary_cols <- sapply(data_filtered, function(x) all(x %in% c(0, 1, NA)))
data_binary <- data_filtered[, .SD, .SDcols = binary_cols]

# Optional NA audits
na_counts  <- sapply(data_binary, function(x) sum(is.na(x)))
na_summary <- data.table(Variable = names(na_counts), NA_Count = na_counts)

# Rename & drop as in original workflow
data_binary <- data_binary %>%
  rename(J_J_OA = J_S_O_B,
         J_J_SOL = J_SOL)

j_s_vars <- grep("^J_S_", names(data_binary), value = TRUE)
if (length(j_s_vars)) data_binary[, (j_s_vars) := NULL]

# Ensure OA is 0 (not NA)
data_binary$J_J_OA <- ifelse(is.na(data_binary$J_J_OA), 0, data_binary$J_J_OA)

# Optional NA audits
na_counts2  <- sapply(data_binary, function(x) sum(is.na(x)))
na_summary2 <- data.table(Variable = names(na_counts2), NA_Count = na_counts2)

# Save binary (incl. solidarity) and final binary datasets
write.csv(data_binary, "data/justification_binary_dataset_SOL.csv", row.names = FALSE)
saveRDS(data_binary, "data/justification_binary_dataset_SOL.rds")
write.csv(
  data_binary,
  file = "~/OneDrive - UvA/1. Justification Paper/JEPP_Submission/JustifyingEuropeanBorderPolicies_SupplementaryMaterials/data/justification_binary_dataset.csv",
  row.names = FALSE
)
saveRDS(
  data_binary,
  file = "~/OneDrive - UvA/1. Justification Paper/JEPP_Submission/JustifyingEuropeanBorderPolicies_SupplementaryMaterials/data/justification_binary_dataset.rds"
)

# ------------------------------------------------------------------------------
# Clustering on selected variables
# ------------------------------------------------------------------------------
justification_binary_dataset <- fread("~/OneDrive - UvA/1. Justification Paper/JEPP_Submission/JustifyingEuropeanBorderPolicies_SupplementaryMaterials/data/justification_binary_dataset.csv")
justification_full_dataset   <- fread("~/OneDrive - UvA/1. Justification Paper/JEPP_Submission/JustifyingEuropeanBorderPolicies_SupplementaryMaterials/data/justification_full_dataset.csv")

clustering_variables <- justification_full_dataset[, c(
  "CTX_MIG", "CTX_PAN",
  "DIR_MR", "DIR_LR", "DIR_M",
  "POL_CON", "POL_VISA", "POL_SEC", "POL_QUO", "POL_ASY",
  "J_SOL",
  "T_PP", "T_CP", "T_FP", "T_HP",
  "J_J_SEC", "J_J_ECO", "J_J_PH", "J_J_EU", "J_J_LEG", "J_J_EFE", "J_J_HUM",
  "J_POS_SP", "J_POS_OP"     # (J_J_OA intentionally excluded)
)]

# Drop cases with missing values in key controls
justification_full_dataset <- justification_full_dataset %>%
  filter(!is.na(DIR_MR) & !is.na(DIR_LR) & !is.na(DIR_M) &
           !is.na(J_POS_SP) & !is.na(J_SOL) & !is.na(J_POS_OP))

# Remove rows with any NA across clustering vars
clustering_variables <- na.omit(clustering_variables)

# Force binary numeric (0/1) for distance calculation
clustering_variables[] <- lapply(clustering_variables, function(x) as.numeric(x == 1))

# Drop very rare variables (≤ 5% prevalence)
variable_proportion <- colSums(clustering_variables != 0) / nrow(clustering_variables)
threshold <- 0.05
variables_to_keep <- names(variable_proportion[variable_proportion > threshold])
clustering_variables <- clustering_variables %>% dplyr::select(all_of(variables_to_keep))

# ------------------------------------------------------------------------------
# Distance, clustering, and diagnostics (save plots)
# ------------------------------------------------------------------------------
dissimilarity_matrix_complete <- vegdist(clustering_variables, method = "gower", binary = TRUE)
hc_main <- hclust(dissimilarity_matrix_complete, method = "complete")

# Dendrogram (base)
save_base_jpeg("dendrogram_jaccard_complete.jpg", {
  plot(hc_main,
       main = "Hierarchical Clustering Dendrogram using Jaccard Distance",
       xlab = "", sub = "")
})

# Elbow/WSS diagnostic
p_elbow <- fviz_nbclust(clustering_variables, hcut, method = "wss",
                        diss = dissimilarity_matrix_complete)
save_png(p_elbow, "appendixD_elbow_wss.jpg")

# ------------------------------------------------------------------------------
# Summaries for k = 3 and k = 4
# ------------------------------------------------------------------------------
cluster_summary <- function(data, cluster_col) {
  data %>%
    group_by(.data[[cluster_col]]) %>%
    summarise(
      count = n(),
      across(where(is.numeric), list(mean = ~ mean(.), sd = ~ sd(.)), .names = "{col}_{fn}")
    )
}

clusters_k3 <- cutree(hc_main, k = 3)
clustering_variables$cluster_k3 <- as.factor(clusters_k3)

clusters_k4 <- cutree(hc_main, k = 4)
clustering_variables$cluster_k4 <- as.factor(clusters_k4)

# k = 3
cluster_summary_k3 <- cluster_summary(clustering_variables, "cluster_k3") %>%
  group_by(cluster_k3) %>%
  summarise_all(mean, na.rm = TRUE)
print(cluster_summary_k3, n = Inf, width = Inf)

# k = 4
cluster_summary_k4 <- cluster_summary(clustering_variables, "cluster_k4") %>%
  group_by(cluster_k4) %>%
  summarise_all(mean, na.rm = TRUE)
print(cluster_summary_k4, n = Inf, width = Inf)  # PAPER TABLE 2

# Save as CSV
write.csv(cluster_summary_k4,
          file = "tables/cluster_summary_k4.csv",
          row.names = FALSE)

# ------------------------------------------------------------------------------
# Attach cluster_k4 back to full dataset (and save the dataset with clusters)
# ------------------------------------------------------------------------------
vars <- c("CTX_MIG","CTX_PAN","DIR_MR","DIR_LR","DIR_M",
          "POL_CON","POL_VISA","POL_SEC","POL_QUO","POL_ASY",
          "J_SOL","T_PP","T_CP","T_FP","T_HP",
          "J_J_SEC","J_J_ECO","J_J_PH","J_J_EU","J_J_LEG","J_J_EFE","J_J_HUM",
          "J_POS_SP","J_POS_OP")

cl_src <- justification_full_dataset %>%
  mutate(row_id = row_number()) %>%
  select(row_id, all_of(vars)) %>%
  tidyr::drop_na()

X <- cl_src %>% select(-row_id)
X[] <- lapply(X, function(x) as.numeric(x == 1))

prop <- colSums(X != 0) / nrow(X)
keep <- names(prop[prop > 0.05])
X <- X %>% dplyr::select(all_of(keep))

d  <- vegan::vegdist(X, method = "gower", binary = TRUE)
hc <- hclust(d, method = "complete")
clusters_k4 <- cutree(hc, k = 4)

clusters_df <- tibble::tibble(row_id = cl_src$row_id,
                              cluster_k4 = factor(clusters_k4))

justification_full_dataset <- justification_full_dataset %>%
  mutate(row_id = row_number()) %>%
  left_join(clusters_df, by = "row_id")

# Save the dataset that includes clusters
write.csv(justification_full_dataset,
          file = "exports/justification_full_dataset_with_clusters.csv",
          row.names = FALSE)
saveRDS(justification_full_dataset,
        file = "exports/justification_full_dataset_with_clusters.rds")

# ------------------------------------------------------------------------------
# Research Question 2: Actors × Clusters (save all plots)
# ------------------------------------------------------------------------------
justification_full_dataset <- justification_full_dataset %>% filter(J_APOS != 0)
table_original <- justification_full_dataset %>%
  filter(!is.na(cluster_k4)) %>%
  with(table(J_APOS, cluster_k4))
print(table_original)

chi_original <- chisq.test(table_original, simulate.p.value = TRUE, B = 10000)
print(chi_original)

# Standardized residuals
residuals_original <- chi_original$stdres
rownames(residuals_original) <- c(
  "German Chancellor",                  # 1
  "Spokesperson: Foreign Ministry",     # 5
  "Spokesperson: Ministry of the Interior",  # 6
  "Spokesperson: Other Ministry",            # 8
  "Spokesperson: Federal Government",        # 9
  "State Minister (Ministerpräsident)", # 10
  "Other"                               # 11
)
colnames(residuals_original) <- c(
  "Legal-Solidarity", 
  "Security-Enforcement", 
  "Public-Health-Emergency", 
  "Critical-Evaluation"
)

# PAPER FIGURE 4: corrplot of residuals (base)
save_base_jpeg("paper_figure_4_corrplot_residuals.jpg", {
  corrplot(residuals_original,
           is.cor = FALSE, tl.col = "black", tl.cex = 0.8,
           col = colorRampPalette(c("#CDBE70", "white", "#003366"))(200),
           cl.pos = "r", cl.cex = 0.7, cl.align.text = "r", cl.ratio = 0.4)
})

# Cells with |stdres| > 2
sig_cells <- which(abs(residuals_original) > 2, arr.ind = TRUE)
print(sig_cells)

# Cramér's V
cramers_v <- assocstats(table_original)$cramer
print(cramers_v)

# Labels and filtered dataset for plots
justification_full_dataset$actor_labels <- factor(justification_full_dataset$J_APOS,
                                                  levels = 1:11,
                                                  labels = c("German Chancellor","Foreign Minister","Minister of the Interior","Other Minister",
                                                             "Spokesperson: Foreign Ministry","Spokesperson: Ministry of the Interior","Spokesperson: Health Ministry",
                                                             "Spokesperson: Other Ministry","Spokesperson: Federal Government","State Minister (Ministerpraesident)","Other")
)
filtered_dataset <- subset(justification_full_dataset, !is.na(actor_labels) & actor_labels != "Other")

# Paper Figure 3 (version 1)
p_fig3 <- ggplot(filtered_dataset, aes(x = actor_labels, fill = factor(cluster_k4))) +
  geom_bar(position = "fill") +
  labs(x = "Actor", y = "Proportion") +
  scale_fill_manual(values = c("#003366", "#99CCFF", "#CDBE70", "#EEDC82"),
                    labels = c("Legal-Solidarity","Security-Enforcement","Public Health Emergency","Critical Evaluation"),
                    name = "Cluster") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
save_png(p_fig3, "paper_figure_3.jpg")


# Stacked bars: actors within clusters
p_actors_across <- ggplot(justification_full_dataset, aes(x = factor(cluster_k4), fill = factor(J_APOS))) +
  geom_bar(position = "fill") +
  labs(x = "Cluster", y = "Proportion", fill = "Actor") +
  scale_y_continuous(labels = percent_format()) +
  scale_fill_manual(
    values = c("#003366","#336699","#6699CC","#6CA6CD","#99CCFF",
               "#CDBE70","#FFCC00","#90EE90","#EEDC82","#8b814c"),
    labels = c("German Chancellor","Foreign Minister","Minister of the Interior",
               "Other Minister","Spokesperson: Foreign Ministry","Spokesperson: Ministry of the Interior",
               "Spokesperson: Health Ministry","Spokesperson: Other Ministry",
               "Spokesperson: Federal Government","State Minister (Ministerpraesident)")
  ) +
  scale_x_discrete(labels = c("Legal-Solidarity","Security-Enforcement",
                              "Public Health Emergency","Critical Evaluation")) +
  theme_minimal()
save_png(p_actors_across, "actors_across_clusters.jpg")

# Proportion table (columns = clusters)
actor_cluster_table <- with(justification_full_dataset, table(J_APOS, cluster_k4))
actor_cluster_prop  <- prop.table(actor_cluster_table, margin = 2)
write.csv(as.data.frame.matrix(actor_cluster_prop),
          "tables/actors_x_clusters_col_props.csv", row.names = TRUE)

# ------------------------------------------------------------------------------
# Research Question 3: Change over time (save all plots)
# ------------------------------------------------------------------------------
# Parse date and build derived periods
justification_full_dataset <- justification_full_dataset %>%
  mutate(DATE = as.Date(DATE, format = "%m/%d/%Y"),
         Month_Year = format(DATE, "%Y/%m"))

# Daily counts per cluster (wide)
cluster_time_series <- justification_full_dataset %>%
  group_by(DATE, cluster_k4) %>%
  summarize(Count = n(), .groups = 'drop') %>%
  spread(cluster_k4, Count, fill = 0) %>%
  arrange(DATE)

# Daily change point plots (base)
result_cluster_1 <- cpt.mean(cluster_time_series$`1`)
result_cluster_2 <- cpt.mean(cluster_time_series$`2`)
result_cluster_3 <- cpt.mean(cluster_time_series$`3`)
result_cluster_4 <- cpt.mean(cluster_time_series$`4`)
save_base_jpeg("cpt_daily_cluster1.jpg", { plot(result_cluster_1, main="Change Point Detection for Cluster 1") })
save_base_jpeg("cpt_daily_cluster2.jpg", { plot(result_cluster_2, main="Change Point Detection for Cluster 2") })
save_base_jpeg("cpt_daily_cluster3.jpg", { plot(result_cluster_3, main="Change Point Detection for Cluster 3") })
save_base_jpeg("cpt_daily_cluster4.jpg", { plot(result_cluster_4, main="Change Point Detection for Cluster 4") })

# Half-year fields
justification_full_dataset <- justification_full_dataset %>%
  mutate(
    DATE = as.Date(DATE, format = "%Y-%m-%d"),  # robust re-parse if needed
    Month_Year = format(DATE, "%Y-%m"),
    Year = year(DATE),
    Month = month(DATE),
    Half = if_else(Month <= 6, "H1", "H2"),
    Half_Year = paste(Year, Half)
  )

# Half-year aggregation (wide)
cluster_time_seriesHY <- justification_full_dataset %>%
  group_by(Half_Year, cluster_k4) %>%
  summarize(Count = n(), .groups = 'drop') %>%
  spread(cluster_k4, Count, fill = 0) %>%
  arrange(Half_Year)

# Helper for half-year change point plots (base)
run_cpt_analysis_hy <- function(cluster_id, title_main, x_positions, x_labels) {
  data_vec <- cluster_time_seriesHY[[as.character(cluster_id)]]
  result <- cpt.mean(data_vec)
  plot(result, main = title_main, xaxt = "n", xlab = "", ylab = "Justification Count",
       cpt.col = "#99CCFF", lwd = 2)
  axis(1, at = x_positions, labels = x_labels, las = 2)
  mtext("Time", side = 1, line = 4.3)
  abline(v = cpts(result), col = "#90EE90", lwd = 3, lty = 2)
  invisible(result)
}

half_year_labels <- cluster_time_seriesHY$Half_Year
half_year_sequence <- seq_along(half_year_labels)

# PAPER FIGURE 6: half-year change point plots (base)
save_base_jpeg("cpt_halfyear_cluster1.jpg", {
  run_cpt_analysis_hy("1",
                      "Change Point Detection: Legal-Solidarity Justification Approach (Half-Year)",
                      half_year_sequence, half_year_labels)
})
save_base_jpeg("cpt_halfyear_cluster2.jpg", {
  run_cpt_analysis_hy("2",
                      "Change Point Detection: Security-Enforcement Justification Approach (Half-Year)",
                      half_year_sequence, half_year_labels)
})
save_base_jpeg("cpt_halfyear_cluster3.jpg", {
  run_cpt_analysis_hy("3",
                      "Change Point Detection: Public-Health Emergency Justification Approach (Half-Year)",
                      half_year_sequence, half_year_labels)
})
save_base_jpeg("cpt_halfyear_cluster4.jpg", {
  run_cpt_analysis_hy("4",
                      "Change Point Detection: Critical-Evaluation Justification Approach (Half-Year)",
                      half_year_sequence, half_year_labels)
})


save_base_jpeg("paper_figure_6_changepoint.jpg", {
  # Set up 2x2 layout with tighter spacing
  par(mfrow = c(2, 2), 
      mar = c(5, 3, 2, 1),
      oma = c(3, 3, 4, 1),
      cex.axis = 0.7)
  
  # Shorter titles for better fit
  run_cpt_analysis_hy("1", "Cluster 1: Legal-Solidarity",
                      half_year_sequence, half_year_labels)
  
  run_cpt_analysis_hy("2", "Cluster 2: Security-Enforcement",
                      half_year_sequence, half_year_labels)
  
  run_cpt_analysis_hy("3", "Cluster 3: Public-Health Emergency",
                      half_year_sequence, half_year_labels)
  
  run_cpt_analysis_hy("4", "Cluster 4: Critical-Evaluation",
                      half_year_sequence, half_year_labels)
})

# Counts (save tables)
monthly_counts <- justification_full_dataset %>%
  group_by(Month_Year) %>% summarize(N = n())
yearly_counts <- justification_full_dataset %>%
  group_by(Year) %>% summarize(N = n())
halfyear_counts <- justification_full_dataset %>%
  group_by(Half_Year) %>% summarize(N = n())

write.csv(monthly_counts,  "tables/cluster_monthly_justification_counts.csv",  row.names = FALSE)
write.csv(yearly_counts,   "tables/cluster_yearly_justification_counts.csv",   row.names = FALSE)
write.csv(halfyear_counts, "tables/cluster_halfyear_justification_counts.csv", row.names = FALSE)

# PAPER FIGURE 5: proportions by half-year (faceted)
# proportions per half-year
cluster_percentage_data <- justification_full_dataset %>%
  group_by(Half_Year, cluster_k4) %>%
  summarise(Count = n(), .groups = 'drop') %>%
  left_join(justification_full_dataset %>% count(Half_Year, name = "Total"),
            by = "Half_Year") %>%
  mutate(Percentage = Count / Total * 100) %>%
  mutate(
    cluster_k4 = factor(cluster_k4,
                        levels = c("1", "2", "3", "4"),
                        labels = c("1 Legal-Solidarity", "2 Security-Enforcement",
                                   "3 Public Health Emergency", "4 Critical Evaluation")),
    # keep Half_Year as an ordered factor used on the x-axis
    Half_Year = factor(Half_Year, levels = sort(unique(Half_Year)))
  )

# ---- EVENT MARKERS (robust) ----
# define events by real dates, then convert to the SAME Half_Year factor as above
events_df <- tibble::tibble(
  event_date = as.Date(c("2015-09-01",   # Migration crisis peak window
                         "2020-03-16",   # COVID border closures (DE)
                         "2022-02-24")), # Russia invades Ukraine
  label = c("Migration Crisis Peak", "COVID Border Closure", "Ukraine Invasion")
) %>%
  mutate(
    Year = lubridate::year(event_date),
    Half = dplyr::if_else(lubridate::month(event_date) <= 6, "H1", "H2"),
    Half_Year = paste(Year, Half),
    # match the plotting factor levels exactly
    Half_Year = factor(Half_Year, levels = levels(cluster_percentage_data$Half_Year)),
    x_idx = match(Half_Year, levels(cluster_percentage_data$Half_Year)),
    # small right nudge so labels don’t collide with bars
    x_pos = x_idx + 0.8,
    y = 0.7
  )

# plot with event lines + labels
p_fig5 <- ggplot(cluster_percentage_data,
                 aes(x = Half_Year, y = Percentage / 100, fill = cluster_k4)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_y_continuous(labels = scales::percent_format(), limits = c(0, 1.3)) +
  scale_fill_manual(values = c("#003366", "#99CCFF", "#CDBE70", "#EEDC82"), name = "Cluster") +
  labs(x = "Half-Year", y = "Percentage of Observations") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank()) +
  facet_wrap(~ cluster_k4, scales = "free_y") +
  # dashed verticals at the matched x indices
  geom_vline(xintercept = events_df$x_idx, linetype = "dashed", color = "black") +
  # rotated text labels (use inherit.aes = FALSE to avoid facet fill/colour issues)
  geom_text(data = events_df,
            aes(x = x_pos, y = y, label = label),
            angle = 90, hjust = 0, size = 2.5,
            inherit.aes = FALSE)
save_png(p_fig5, "paper_figure_5.jpg")

# Appendix by year
cluster_percentage_year <- justification_full_dataset %>%
  group_by(Year, cluster_k4) %>%
  summarise(Count = n(), .groups = 'drop') %>%
  left_join(justification_full_dataset %>% count(Year, name = "Total"), by = "Year") %>%
  mutate(Percentage = Count / Total * 100) %>%
  mutate(cluster_k4 = factor(cluster_k4,
                             levels = c("1", "2", "3", "4"),
                             labels = c("1 Legal-Solidarity", "2 Security-Enforcement",
                                        "3 Public Health Emergency", "4 Critical Evaluation")))
p_year <- ggplot(cluster_percentage_year, aes(x = as.factor(Year), y = Percentage / 100, fill = factor(cluster_k4))) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_y_continuous(labels = percent_format(), limits = c(0, 1)) +
  scale_fill_manual(values = c("#003366", "#99CCFF", "#CDBE70", "#EEDC82"), name = "Cluster") +
  labs(title = "Usage of the Different Justification Approaches Over Time (Yearly)",
       x = "Year", y = "Percentage of Observations") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  facet_wrap(~ cluster_k4, scales = "free_y")
save_png(p_year, "appendixF1_usage_yearly.jpg")

# Appendix by month
cluster_percentage_month <- justification_full_dataset %>%
  group_by(Month_Year, cluster_k4) %>%
  summarise(Count = n(), .groups = 'drop') %>%
  left_join(justification_full_dataset %>% count(Month_Year, name = "Total"),
            by = "Month_Year") %>%
  mutate(Percentage = Count / Total * 100) %>%
  mutate(cluster_k4 = factor(cluster_k4,
                             levels = c("1", "2", "3", "4"),
                             labels = c("1 Legal-Solidarity", "2 Security-Enforcement",
                                        "3 Public Health Emergency", "4 Critical Evaluation")))
p_month <- ggplot(cluster_percentage_month, aes(x = Month_Year, y = Percentage / 100, fill = factor(cluster_k4))) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_y_continuous(labels = percent_format(), limits = c(0, 1)) +
  scale_fill_manual(values = c("#003366", "#99CCFF", "#CDBE70", "#EEDC82"), name = "Cluster") +
  labs(title = "Usage of the Different Justification Approaches Over Time (Monthly)",
       x = "Month", y = "Percentage of Observations") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 6)) +
  facet_wrap(~ cluster_k4, scales = "free_y")
save_png(p_month, "appendixF2_usage_monthly.jpg")

#End
